Written by Aunoy Poddar July 21st, 2022
current_file <- rstudioapi::getActiveDocumentContext()$path
output_file <- stringr::str_replace(current_file, '.Rmd', '.R')
knitr::purl(current_file, output = output_file)
file.edit(output_file)
library(Seurat)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching SeuratObject
library(tictoc)
library(ggplot2)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ tibble 3.1.8 ✔ dplyr 1.0.8
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
✔ purrr 0.3.4
── Conflicts ───────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
library(png)
library(cowplot)
Attaching package: ‘cowplot’
The following object is masked from ‘package:patchwork’:
align_plots
library(magick)
Linking to ImageMagick 6.9.10.23
Enabled features: fontconfig, freetype, fftw, lcms, pango, webp, x11
Disabled features: cairo, ghostscript, heic, raw, rsvg
Using 80 threads
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
library(packcircles)
library(ggalt)
Registered S3 methods overwritten by 'ggalt':
method from
grid.draw.absoluteGrob ggplot2
grobHeight.absoluteGrob ggplot2
grobWidth.absoluteGrob ggplot2
grobX.absoluteGrob ggplot2
grobY.absoluteGrob ggplot2
library(cccd)
Loading required package: igraph
Attaching package: ‘igraph’
The following objects are masked from ‘package:dplyr’:
as_data_frame, groups, union
The following objects are masked from ‘package:purrr’:
compose, simplify
The following object is masked from ‘package:tidyr’:
crossing
The following object is masked from ‘package:tibble’:
as_data_frame
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
library(stringr)
library(ggfortify)
Registered S3 method overwritten by 'ggfortify':
method from
fortify.table ggalt
library(circlize)
========================================
circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/
If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
in R. Bioinformatics 2014.
This message can be suppressed by:
suppressPackageStartupMessages(library(circlize))
========================================
Attaching package: ‘circlize’
The following object is masked from ‘package:igraph’:
degree
library(reshape2)
Attaching package: ‘reshape2’
The following object is masked from ‘package:tidyr’:
smiths
library(igraph)
#library("leiden")
data_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/rethresholded'
clump_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clumps'
meta_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/overlay'
output_dir_plot = '/home/aunoy/st/arc_profiling/st_analysis/results/plots'
output_dir_tbls = '/home/aunoy/st/arc_profiling/st_analysis/results/tables'
meta_ntrscts = read.csv(file.path(clump_dir, 'meta', 'META_ntrsct.csv'), header = FALSE) %>%
as_tibble()
df_408 <- load_slice(slice = "408")
df_408 <- clean_408(df_408)
df_408 <- embed_horizontal_408(df_408)
rownames(df_408) = 1:nrow(df_408)
jy_408 = df_408 %>%
dplyr::select(-c(area, IMAGE.NAME, X_horz, Y_horz, clump)) %>%
t() %>%
CreateSeuratObject()
jy_408$image <- df_408$IMAGE.NAME
jy_408 <- NormalizeData(jy_408, scale.factor = 1e5) ###
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
normed = GetAssayData(jy_408, slot = 'data')
normed[normed < 3] = 0
for(gene_name in rownames(jy_408)) {
mdn_gene_expr = median(normed[gene_name, normed[gene_name, ] > 0])
}
jy_408 <- SetAssayData(jy_408, slot = 'data', normed)
jy_408 <- FindVariableFeatures(jy_408, selection.method = "vst")
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
all.genes <- rownames(jy_408)
jy_408 <- ScaleData(jy_408, features = all.genes)
Centering and scaling data matrix
|
| | 0%
|
|=============================================================================================| 100%
jy_408 <- RunPCA(jy_408, approx = FALSE)
Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.PC_ 1
Positive: VIP, ASCL1, CXCR4, RELN, MAF1, SATB2, GAD1, EMX1, KIA0319, DLX2
PAX6, CXCL12, LHX6, PROX1, SST, VLDLR
Negative: DCX, TBR1, LRP8, SCGN, COUPTF2, CXCL14, EGFR, TSHZ1, GSX2, EOMES
SP8, CALB2, NKX2.1, DCDC2, CXCR7, NCAM1
PC_ 2
Positive: MAF1, NKX2.1, DLX2, GSX2, SST, TSHZ1, PROX1, GAD1, SP8, VIP
LHX6, CXCL14, SCGN, CXCR4, VLDLR, RELN
Negative: EOMES, TBR1, KIA0319, DCDC2, CALB2, SATB2, CXCL12, DCX, LRP8, EGFR
COUPTF2, PAX6, NCAM1, ASCL1, CXCR7, EMX1
PC_ 3
Positive: RELN, SATB2, PAX6, MAF1, ASCL1, SST, SCGN, NCAM1, EMX1, GSX2
DCX, KIA0319, VIP, EGFR, CXCR7, TBR1
Negative: COUPTF2, GAD1, PROX1, DLX2, CXCR4, LRP8, LHX6, TSHZ1, CXCL14, EOMES
NKX2.1, CALB2, SP8, DCDC2, CXCL12, VLDLR
PC_ 4
Positive: NCAM1, TSHZ1, VLDLR, PROX1, CXCR7, CXCL14, DCX, EGFR, ASCL1, DCDC2
PAX6, CALB2, SCGN, GAD1, EOMES, KIA0319
Negative: LHX6, LRP8, NKX2.1, TBR1, SP8, COUPTF2, DLX2, VIP, EMX1, GSX2
RELN, SST, MAF1, CXCL12, CXCR4, SATB2
PC_ 5
Positive: SP8, GSX2, SCGN, NKX2.1, EGFR, CXCL14, COUPTF2, TSHZ1, EMX1, TBR1
PAX6, CALB2, SATB2, MAF1, NCAM1, CXCR7
Negative: CXCR4, SST, GAD1, VIP, EOMES, DLX2, DCDC2, RELN, PROX1, LHX6
VLDLR, DCX, ASCL1, CXCL12, KIA0319, LRP8
jy_408 <- FindNeighbors(jy_408, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
jy_408 <- FindClusters(jy_408, resolution = 0.8)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 1190
Number of edges: 41891
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.6696
Number of communities: 7
Elapsed time: 0 seconds
jy_408 <- RunUMAP(jy_408, dims = 1:30)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session09:25:10 UMAP embedding parameters a = 0.9922 b = 1.112
09:25:10 Read 1190 rows and found 30 numeric columns
09:25:10 Using Annoy for neighbor search, n_neighbors = 30
09:25:10 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:25:10 Writing NN index file to temp file /tmp/Rtmpie36qI/file25503b1e881e8b
09:25:10 Searching Annoy index using 1 thread, search_k = 3000
09:25:11 Annoy recall = 100%
09:25:11 Commencing smooth kNN distance calibration using 1 thread
09:25:12 Initializing from normalized Laplacian + noise
09:25:12 Commencing optimization for 500 epochs, with 45048 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:25:14 Optimization finished
DimPlot(jy_408, reduction = "umap", group.by = 'seurat_clusters') + NoAxes()
jy_408.markers <- FindAllMarkers(jy_408, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster 0
| | 0 % ~calculating
|++++++++++ | 20% ~00s
|++++++++++++++++++++ | 40% ~00s
|++++++++++++++++++++++++++++++ | 60% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
Calculating cluster 1
| | 0 % ~calculating
|+++++ | 8 % ~00s
|+++++++++ | 17% ~00s
|+++++++++++++ | 25% ~00s
|+++++++++++++++++ | 33% ~00s
|+++++++++++++++++++++ | 42% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++++++ | 58% ~00s
|++++++++++++++++++++++++++++++++++ | 67% ~00s
|++++++++++++++++++++++++++++++++++++++ | 75% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
Calculating cluster 2
| | 0 % ~calculating
|+++++ | 8 % ~00s
|+++++++++ | 17% ~00s
|+++++++++++++ | 25% ~00s
|+++++++++++++++++ | 33% ~00s
|+++++++++++++++++++++ | 42% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++++++ | 58% ~00s
|++++++++++++++++++++++++++++++++++ | 67% ~00s
|++++++++++++++++++++++++++++++++++++++ | 75% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
Calculating cluster 3
| | 0 % ~calculating
|++++++ | 11% ~00s
|++++++++++++ | 22% ~00s
|+++++++++++++++++ | 33% ~00s
|+++++++++++++++++++++++ | 44% ~00s
|++++++++++++++++++++++++++++ | 56% ~00s
|++++++++++++++++++++++++++++++++++ | 67% ~00s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
Calculating cluster 4
| | 0 % ~calculating
|+++++++ | 12% ~00s
|+++++++++++++ | 25% ~00s
|+++++++++++++++++++ | 38% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++++++++ | 62% ~00s
|++++++++++++++++++++++++++++++++++++++ | 75% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
Calculating cluster 5
| | 0 % ~calculating
|++++++++ | 14% ~00s
|+++++++++++++++ | 29% ~00s
|++++++++++++++++++++++ | 43% ~00s
|+++++++++++++++++++++++++++++ | 57% ~00s
|++++++++++++++++++++++++++++++++++++ | 71% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
Calculating cluster 6
| | 0 % ~calculating
|+++++ | 8 % ~00s
|+++++++++ | 17% ~00s
|+++++++++++++ | 25% ~00s
|+++++++++++++++++ | 33% ~00s
|+++++++++++++++++++++ | 42% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++++++ | 58% ~00s
|++++++++++++++++++++++++++++++++++ | 67% ~00s
|++++++++++++++++++++++++++++++++++++++ | 75% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
jy_408.markers %>%
group_by(cluster) %>%
slice_max(n = 32, order_by = avg_log2FC)
NA
jy_408$clump = df_408$clump
DimPlot(jy_408, reduction = "umap", group.by = 'clump') + NoAxes() + NoLegend()
hcoords = df_408 %>% dplyr::select(c('X_horz', 'Y_horz')) %>% as.matrix()
colnames(hcoords) <- c('pixel_1', 'pixel_2')
jy_408[["H"]] <- CreateDimReducObject(embeddings = hcoords, key = "pixel_", assay = DefaultAssay(jy_408))
df_164 <- load_slice(slice = "164")
df_164 <- clean_164(df_164)
df_164 <- embed_horizontal_164(df_164)
rownames(df_164) = 1:nrow(df_164)
jy_164 = df_164 %>%
dplyr::select(-c(area, IMAGE.NAME, X, Y, clump)) %>%
t() %>%
CreateSeuratObject()
jy_164$image = df_164$IMAGE.NAME
jy_164 <- NormalizeData(jy_164, scale.factor = 1e5) ###
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
normed = GetAssayData(jy_164, slot = 'data')
normed[normed < 3] = 0
for(gene_name in rownames(jy_164)) {
mdn_gene_expr = median(normed[gene_name, normed[gene_name, ] > 0])
}
jy_164 <- SetAssayData(jy_164, slot = 'data', normed)
jy_164 <- FindVariableFeatures(jy_164, selection.method = "vst")
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
all.genes <- rownames(jy_164)
jy_164 <- ScaleData(jy_164, features = all.genes)
Centering and scaling data matrix
|
| | 0%
|
|=============================================================================================| 100%
jy_164 <- RunPCA(jy_164, approx = FALSE)
Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.PC_ 1
Positive: NKX2.1, MAF1, SCGN, PROX1, GAD1, SP8, VIP, LHX6, GSX2, DCX
TSHZ1, CXCR7, COUPTF2, SST, DLX2, LRP8
Negative: KIA0319, SATB2, ASCL1, PAX6, CALB2, DCDC2, RELN, NCAM1, CXCL12, TBR1
EMX1, EGFR, CXCL14, VLDLR, CXCR4, EOMES
PC_ 2
Positive: DCX, SCGN, SP8, TBR1, EGFR, SATB2, DCDC2, EOMES, NKX2.1, LRP8
KIA0319, NCAM1, EMX1, CXCR7, TSHZ1, CALB2
Negative: GAD1, VIP, CXCR4, LHX6, MAF1, DLX2, SST, RELN, CXCL12, CXCL14
COUPTF2, PROX1, PAX6, GSX2, ASCL1, VLDLR
PC_ 3
Positive: COUPTF2, SP8, PROX1, SCGN, EOMES, TBR1, MAF1, CXCR7, NKX2.1, GSX2
TSHZ1, LHX6, DCDC2, EGFR, VIP, LRP8
Negative: SST, ASCL1, PAX6, RELN, CALB2, CXCL14, EMX1, CXCL12, DLX2, NCAM1
GAD1, CXCR4, KIA0319, DCX, VLDLR, SATB2
PC_ 4
Positive: TSHZ1, PROX1, SP8, ASCL1, DLX2, VLDLR, NKX2.1, NCAM1, PAX6, CXCR7
SCGN, EMX1, RELN, CXCL14, VIP, KIA0319
Negative: LRP8, TBR1, EOMES, CALB2, LHX6, GSX2, COUPTF2, DCX, MAF1, GAD1
CXCL12, DCDC2, SATB2, EGFR, CXCR4, SST
PC_ 5
Positive: PAX6, COUPTF2, RELN, TSHZ1, DLX2, SP8, CXCL12, SCGN, TBR1, GSX2
LRP8, DCX, SATB2, LHX6, ASCL1, GAD1
Negative: NKX2.1, DCDC2, EOMES, VLDLR, NCAM1, SST, CXCR7, EMX1, CXCR4, EGFR
MAF1, KIA0319, VIP, PROX1, CXCL14, CALB2
jy_164 <- FindNeighbors(jy_164, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
jy_164 <- FindClusters(jy_164, resolution = 0.8)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 802
Number of edges: 30100
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.6519
Number of communities: 6
Elapsed time: 0 seconds
jy_164 <- RunUMAP(jy_164, dims = 1:30)
09:25:18 UMAP embedding parameters a = 0.9922 b = 1.112
09:25:18 Read 802 rows and found 30 numeric columns
09:25:18 Using Annoy for neighbor search, n_neighbors = 30
09:25:18 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:25:18 Writing NN index file to temp file /tmp/Rtmpie36qI/file25503b737aeae1
09:25:18 Searching Annoy index using 1 thread, search_k = 3000
09:25:18 Annoy recall = 100%
09:25:19 Commencing smooth kNN distance calibration using 1 thread
09:25:20 Initializing from normalized Laplacian + noise
09:25:20 Commencing optimization for 500 epochs, with 30056 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:25:21 Optimization finished
jy_164$clump <- df_164$clump
DimPlot(jy_164, reduction = "umap", group.by = 'seurat_clusters') + NoAxes()
DimPlot(jy_164, reduction = "umap", group.by = 'clump') + NoAxes() + NoLegend()
unique(df_164$IMAGE.NAME)
[1] "CC_8" "CC_10" "CC_Cortical1" "CC_Cortical2" "CC_L2-1" "CC_L2-2"
[7] "CC_L2-3" "CC_2" "CC_3" "CC_4" "CC_5" "CC_6"
[13] "CC_7" "CC_9" "TC_1" "TC_2" "TC_3" "TC_4"
[19] "TC_5" "TC_6" "TC_7" "TC_8" "TC_9" "TC_10"
[25] "TC_Cortical1" "TC_Cortical2" "TC_Cortical3"
images_ordered = c('TC_Cortical3', 'TC_Cortical2', 'TC_Cortical1', 'TC_10', 'TC_9', 'TC_8', 'TC_7', 'TC_6', 'TC_5', 'TC_4', 'TC_3', 'TC_2','TC_1','CC_2','CC_3',
'CC_4', 'CC_5', 'CC_6', 'CC_7', 'CC_9', 'CC_8', 'CC_10',
'CC_L2-1', 'CC_L2-2', 'CC_L2-3', 'CC_Cortical1', 'CC_Cortical2')
x_horz = 1:length(images_ordered) * 35
y_horz = rep(0, length(images_ordered))
horz_embedding = data.frame()
df_164$X_horz = -1
df_164$Y_horz = -1
images = list.files(meta_dir)
for(i in 1:length(images_ordered)){
image_name = images_ordered[i]
print(image_name)
split_names = strsplit(image_name, '_')
cortex = toupper(split_names[[1]][1])
number = split_names[[1]][2]
number_csv = paste0('_', number, '.csv')
filename = images[grepl(cortex, images) & grepl(number_csv, images) & grepl('164', images)]
coordinates = read.table(file.path(meta_dir, filename), sep = ',', header = TRUE)
if(image_name == "CC_L2-1"){
coordinates = coordinates[c(1:37, 39:nrow(coordinates)), ]
}
## checked already that lists are equal, missing 1, 18, 19 for now, layer 1 and others
## so this is a little tricky, so need to get it right
## Remember, it is the top right that the coordinate is coming from, but
## the bottom right is the new coordinate space.
## so first when we get the original coordinate space, to set to relative
## of bottom would be the same X, but 1024 - Y
## push out the coordinates for better visualization
#x_repelled <- (512 - coordinates$X_Coordinate_In_pixels)
df_164[df_164$IMAGE.NAME == image_name, 'X_horz'] = (coordinates$X_Coordinate_In_pixels /
IMAGE_SIZE * IMAGE_LEN) + y_horz[i]
df_164[df_164$IMAGE.NAME == image_name, 'Y_horz'] = ((1024-coordinates$Y_Coordinate_In_pixels) /
IMAGE_SIZE * IMAGE_LEN) + x_horz[i]
}
[1] "TC_Cortical3"
[1] "TC_Cortical2"
[1] "TC_Cortical1"
[1] "TC_10"
[1] "TC_9"
[1] "TC_8"
[1] "TC_7"
[1] "TC_6"
[1] "TC_5"
[1] "TC_4"
[1] "TC_3"
[1] "TC_2"
[1] "TC_1"
[1] "CC_2"
[1] "CC_3"
[1] "CC_4"
[1] "CC_5"
[1] "CC_6"
[1] "CC_7"
[1] "CC_9"
[1] "CC_8"
[1] "CC_10"
[1] "CC_L2-1"
[1] "CC_L2-2"
[1] "CC_L2-3"
[1] "CC_Cortical1"
[1] "CC_Cortical2"
hcoords = df_164 %>% dplyr::select(c('X_horz', 'Y_horz')) %>% as.matrix()
colnames(hcoords) <- c('pixel_1', 'pixel_2')
jy_164[["H"]] <- CreateDimReducObject(embeddings = hcoords, key = "pixel_", assay = DefaultAssay(jy_164))
jy_164<- RenameCells(jy_164, c(outer('164_', 1:ncol(jy_164), FUN=paste0)))
jy_164$area = df_164$area
jy_408<- RenameCells(jy_408, c(outer('408_', 1:ncol(jy_408), FUN=paste0)))
jy_408$area = df_408$area
jy_all <- merge(jy_164, jy_408)
jy_all <- NormalizeData(jy_all, scale.factor = 1e5) ###
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
normed = GetAssayData(jy_all, slot = 'data')
normed[normed < 3] = 0
for(gene_name in rownames(jy_all)) {
if (gene_name == 'DCX'){
mdn_gene_expr = 0.5
print('skip dcx')
} else if (!gene_name %in% c('COUPTF2', 'SP8')){
mdn_gene_expr = median(normed[gene_name, normed[gene_name, ] > 0])
}else{
mdn_gene_expr = quantile(normed[gene_name, normed[gene_name, ] > 0], .40)
}
normed[gene_name, normed[gene_name, ] < mdn_gene_expr] = 0
}
[1] "skip dcx"
jy_all <- SetAssayData(jy_all, slot = 'data', normed)
jy_all <- FindVariableFeatures(jy_all, selection.method = "vst")
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
all.genes <- rownames(jy_all)
jy_all <- ScaleData(jy_all, features = all.genes)
Centering and scaling data matrix
|
| | 0%
|
|=============================================================================================| 100%
jy_all <- RunPCA(jy_all, approx = FALSE)
Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.PC_ 1
Positive: DCX, KIA0319, TBR1, SATB2, EOMES, DCDC2, LRP8, EGFR, CALB2, NCAM1
VLDLR, ASCL1, PAX6, CXCL12, SCGN, COUPTF2
Negative: GAD1, VIP, LHX6, MAF1, CXCR4, SST, NKX2.1, DLX2, PROX1, TSHZ1
SP8, GSX2, RELN, CXCL14, EMX1, CXCR7
PC_ 2
Positive: SCGN, TSHZ1, DCX, NKX2.1, SP8, CXCR7, PROX1, GSX2, COUPTF2, TBR1
LRP8, VLDLR, CXCL14, EGFR, MAF1, NCAM1
Negative: ASCL1, KIA0319, CXCR4, SATB2, CALB2, CXCL12, VIP, RELN, PAX6, GAD1
DLX2, DCDC2, SST, LHX6, EOMES, EMX1
PC_ 3
Positive: NCAM1, TSHZ1, MAF1, VLDLR, PROX1, ASCL1, CXCR7, RELN, GSX2, CXCL14
PAX6, EGFR, EMX1, CXCR4, SP8, DCDC2
Negative: CALB2, TBR1, CXCL12, LRP8, DCX, SCGN, SST, DLX2, COUPTF2, GAD1
LHX6, SATB2, EOMES, VIP, KIA0319, NKX2.1
PC_ 4
Positive: GAD1, LRP8, TBR1, VIP, EGFR, LHX6, CXCR4, DCDC2, NCAM1, EOMES
DCX, CXCL14, CXCR7, SST, MAF1, TSHZ1
Negative: SP8, DLX2, PAX6, SCGN, CALB2, NKX2.1, EMX1, ASCL1, COUPTF2, SATB2
CXCL12, KIA0319, GSX2, PROX1, VLDLR, RELN
PC_ 5
Positive: TBR1, LHX6, LRP8, RELN, SP8, PAX6, COUPTF2, GSX2, SST, ASCL1
EGFR, EMX1, MAF1, DLX2, DCX, NKX2.1
Negative: CALB2, NCAM1, CXCL12, VLDLR, TSHZ1, PROX1, CXCR7, CXCL14, GAD1, DCDC2
KIA0319, CXCR4, SATB2, VIP, SCGN, EOMES
jy_all <- FindNeighbors(jy_all, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
jy_all <- FindClusters(jy_all, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 1992
Number of edges: 66856
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8190
Number of communities: 7
Elapsed time: 0 seconds
jy_all <- RunUMAP(jy_all, dims = 1:30)
09:25:25 UMAP embedding parameters a = 0.9922 b = 1.112
09:25:25 Read 1992 rows and found 30 numeric columns
09:25:25 Using Annoy for neighbor search, n_neighbors = 30
09:25:25 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:25:25 Writing NN index file to temp file /tmp/Rtmpie36qI/file25503b4a9beeaf
09:25:25 Searching Annoy index using 1 thread, search_k = 3000
09:25:26 Annoy recall = 100%
09:25:26 Commencing smooth kNN distance calibration using 1 thread
09:25:27 Initializing from normalized Laplacian + noise
09:25:27 Commencing optimization for 500 epochs, with 77712 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:25:31 Optimization finished
DimPlot(jy_all, reduction = "umap", group.by = 'seurat_clusters') + NoAxes()
DimPlot(jy_all, reduction = "umap", group.by = 'area') + NoAxes() # NoLegend()
FeaturePlot(jy_all, c('CXCL12', 'CXCR4'), cells = which(FetchData(jy_all, 'GAD1') > 0), cols = c( '#F18F01', '#048BA8'), blend= TRUE)
Warning: Only two colors provided, assuming specified are for features and agumenting with 'lightgrey' for double-negatives
new.cluster.ids = c('CGE/LGE',
'Ex',
'TBR1+ CGE',
'CALB2+DLX2+',
'VIP+GAD1+',
'SST+LHX6+',
'MGE')
names(new.cluster.ids) <- levels(jy_all)
jy_all <- RenameIdents(jy_all, new.cluster.ids)
## Need to extract as cell x position matrix (X, Y)
## Let's start with 5 neighbors
XY_164 = Embeddings(jy_164, 'H')
neighbors = 5
nn_graph = nng(XY_164, k = neighbors)
#XY_408 = Embeddings(jy_408, 'H')
avg_same = c()
avg_othr = c()
for(i in 1:ncol(jy_164)){
## so I am going to iterate through each cell
## Get the class identity of the cell
class_cell = Idents(jy_164)[i]
## Get the cells that are the same
nearest_neighbors = as.matrix(nn_graph[[i]])
neighbor_idents = Idents(jy_164)[nearest_neighbors[[1]]]
same_ixs = which(neighbor_idents == class_cell)
## Get the cells that are not the same
othr_ixs = which(neighbor_idents != class_cell)
## Get the avreage distance of same
same_obj = dist(XY_164[c(i, nearest_neighbors[[1]][same_ixs]), ])
avg_same = c(avg_same, mean(same_obj[1:length(same_ixs)]))
## Get the avreage distance of not same
othr_obj = dist(XY_164[c(i, nearest_neighbors[[1]][othr_ixs]), ])
avg_othr = c(avg_othr, mean(same_obj[1:length(othr_ixs)]))
}
tc_csv = read_csv('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump/164_TC_3_1_assignments.csv', col_names = FALSE)
Rows: 13 Columns: 2── Column specification ──────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): X1, X2
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tc_csv[1, 2].value
Error: unexpected symbol in "tc_csv[1, 2].value"
pre_str
[1] "[u'4', u'6']"
str1 <- gsub('\\]', '', gsub('\\[', '', pre_str))
str2 <- gsub('u', '', gsub("'", '', str1))
## Load from
clumps_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump/clump_data/'
clump_num_df = data.frame(img= character(0), y= integer(0), z = integer(0))
colnames(clump_num_df) <- c('img', 'pixel_ext', 'clust_num')
prev_clump = 'temp'
tic()
for(file_name in list.files(clumps_dir)){
### If the file is an assignment file, skip
if(grepl('assignment', file_name)){
next
}
### We need to isolate the name, pixel_ext, # of clusters
split_name = strsplit(file_name, '_')
clump_name = paste(split_name[[1]][1:3], collapse = "_")
pixel_ext = as.integer(strsplit(split_name[[1]][4], '[.]')[[1]][1])
### Load the file
clump_csv <- read_csv(file.path(clumps_dir, file_name))
if(prev_clump != clump_name){
prev_clump = clump_name
print(clump_name)
}
num_clusters = nrow(clump_csv)
clump_num_df <- clump_num_df %>%
add_row(img = clump_name, pixel_ext = pixel_ext, clust_num = num_clusters)
}
[1] "164_CC_10"
[1] "164_CC_2"
[1] "164_CC_3"
[1] "164_CC_4"
[1] "164_CC_5"
[1] "164_CC_6"
[1] "164_CC_7"
[1] "164_CC_8"
[1] "164_CC_9"
[1] "164_CC_Cortical1"
[1] "164_CC_Cortical2"
[1] "164_CC_L2-1"
[1] "164_CC_L2-2"
[1] "164_CC_L2-3"
[1] "164_TC_1"
[1] "164_TC_10"
[1] "164_TC_2"
[1] "164_TC_3"
[1] "164_TC_4"
[1] "164_TC_5"
[1] "164_TC_6"
[1] "164_TC_7"
[1] "164_TC_8"
[1] "164_TC_9"
[1] "164_TC_Cortical1"
[1] "164_TC_Cortical2"
[1] "164_TC_Cortical3"
[1] "408_CC_10"
[1] "408_CC_11"
[1] "408_CC_12"
[1] "408_CC_4"
[1] "408_CC_5"
[1] "408_CC_6"
[1] "408_CC_7"
[1] "408_CC_8"
[1] "408_CC_9"
[1] "408_Cortical_1"
[1] "408_Cortical_2"
[1] "408_L2_1"
[1] "408_L2_2"
[1] "408_L2_3"
[1] "408_TC_10"
[1] "408_TC_11"
[1] "408_TC_12"
[1] "408_TC_13"
[1] "408_TC_14"
[1] "408_TC_15"
[1] "408_TC_16"
[1] "408_TC_17"
[1] "408_TC_2"
[1] "408_TC_20"
[1] "408_TC_3"
[1] "408_TC_4"
[1] "408_TC_5"
[1] "408_TC_6"
[1] "408_TC_7"
[1] "408_TC_8"
[1] "408_TC_9"
toc()
795.368 sec elapsed
## 1644 seconds it taeks, so about half an hour. This is worth saving lol
nrow(bro_test)
[1] 17
## Get the order
samp <- clump_num_df %>%
filter(pixel_ext == 1) %>%
arrange(clust_num)
order = samp$img
clump_num_df %>%
#filter(pixel_ext <= 10) %>%
arrange(clust_num) %>%
ggplot(aes(x = img, y = clust_num, group = factor(pixel_ext))) +
geom_line(aes(color=factor(pixel_ext))) +
geom_point(aes(color=factor(pixel_ext))) +
scale_x_discrete(limits = order) + theme_classic() + RotatedAxis()
get_numeric_assignments <- function(assignment_str){
str1 <- gsub('\\]', '', gsub('\\[', '', assignment_str))
str2 <- gsub('u', '', gsub("'", '', str1))
return(as.numeric(unlist(strsplit(str2, ','))))
}
get_assignments <- function(assignment_str, image_name){
### breakdown
assignment_list <- get_numeric_assignments(assignment_str)
if(length(assignment_list) == 0){
return(-1)
}
if(grepl('164', image_name)){
sdf <- df_164
sobj <- jy_164
}
else{
sdf <- df_408
sobj <- jy_408
}
img_name_abbr = gsub('..._', '', image_name)
cell_ixs <- which(sdf$IMAGE.NAME == img_name_abbr)
image_sobj <- sobj[, cell_ixs]
cell_names <- colnames(image_sobj[, assignment_list])
## just return the names
cells <- jy_all[, which(colnames(jy_all) %in% cell_names)]
## Will we get the same response each time?
return(cells)
}
### So I am going to make a clump DF and also going to update jy_all
## Load from
clumps_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump/clump_data/'
pixel_ext = '1'
features = c("...1","Area", "Perim.", "Major", "Minor", "Angle",
"Circ.", "Feret", "%Area","MinFeret", "AR",
"Round", "Solidity" )
#jy_all$clump = "NaN"
clump_df = data.frame()
tic()
#[501:length(list.files(clumps_dir))]
for(file_name in list.files(clumps_dir)){
### If the file is an assignment file, skip
if(!grepl(paste0('_', pixel_ext, '.csv'), file_name) || grepl('assignments', file_name)){ #|| grepl('164', file_name)){
next
}
print(file_name)
### We need to isolate the name, pixel_ext, # of clusters
split_name = strsplit(file_name, '_')
clump_name = paste(split_name[[1]][1:3], collapse = "_")
assignment_name = paste0(clump_name, '_', pixel_ext, '_assignments.csv')
#pixel_ext = as.integer(strsplit(split_name[[1]][4], '[.]')[[1]][1])
### Load the file
clump_name = gsub('Cortical_', 'Cortical', clump_name)
clump_name = gsub('L2_', 'L2-', clump_name)
clump_csv <- read_csv(file.path(clumps_dir, file_name))
clump_csv$...1 <- as.character(clump_csv$...1)
assignments_csv <- read_csv(file.path(clumps_dir, assignment_name), col_names = FALSE)
temp <- data.frame()
gtemp <- data.frame()
ctype_names <- levels(Idents(jy_all))
for(i in 1:nrow(clump_csv)){
itr_name = paste0(clump_name, '_Clump_', i)
clump_csv[i, 1] = itr_name
assignment_str = assignments_csv[which(assignments_csv$X1 == itr_name), 2][[1]]
cells = get_assignments(assignment_str, clump_name)
ctypes <- matrix(0, 1, length(unique(Idents(jy_all))))
clump_expr <- matrix(NaN, 1, length(rownames(jy_all)))
if(class(cells) != "numeric"){
for(j in 1:ncol(cells)) {
ctype = which(ctype_names == Idents(cells)[[j]])
ctypes[ctype] = ctypes[ctype] + 1
}
clump_expr <- rowMeans(as.matrix(GetAssayData(cells, 'data')))
jy_all$clump[which(colnames(jy_all) %in% colnames(cells))] = clump_name
}
else{
colnames(clump_expr) <- colnames(gtemp)
}
temp <- rbind(temp, ctypes)
### This is to get average expression in clump
gtemp <- rbind(gtemp, clump_expr)
}
colnames(temp) <- ctype_names
colnames(gtemp) <- rownames(jy_all)
clump_csv_to_add <- clump_csv %>% dplyr::select(features)
clump_df <- rbind(clump_df, cbind(clump_csv_to_add, temp, gtemp))
#print(assignment_name)
#df_408 <- rbind(df_408, df_to_append)
### Gotta make sure that the image names are the same, there are some discrepancies
}
[1] "164_CC_10_1.csv"
[1] "164_CC_2_1.csv"
[1] "164_CC_3_1.csv"
[1] "164_CC_4_1.csv"
[1] "164_CC_5_1.csv"
[1] "164_CC_6_1.csv"
[1] "164_CC_7_1.csv"
[1] "164_CC_8_1.csv"
[1] "164_CC_9_1.csv"
[1] "164_CC_Cortical1_1.csv"
[1] "164_CC_Cortical2_1.csv"
[1] "164_CC_L2-1_1.csv"
[1] "164_CC_L2-2_1.csv"
[1] "164_CC_L2-3_1.csv"
[1] "164_TC_1_1.csv"
[1] "164_TC_10_1.csv"
[1] "164_TC_2_1.csv"
[1] "164_TC_3_1.csv"
[1] "164_TC_4_1.csv"
[1] "164_TC_5_1.csv"
[1] "164_TC_6_1.csv"
[1] "164_TC_7_1.csv"
[1] "164_TC_8_1.csv"
[1] "164_TC_9_1.csv"
[1] "164_TC_Cortical1_1.csv"
[1] "164_TC_Cortical2_1.csv"
[1] "164_TC_Cortical3_1.csv"
[1] "408_CC_10_1.csv"
[1] "408_CC_11_1.csv"
[1] "408_CC_12_1.csv"
[1] "408_CC_4_1.csv"
[1] "408_CC_5_1.csv"
[1] "408_CC_6_1.csv"
[1] "408_CC_7_1.csv"
[1] "408_CC_8_1.csv"
[1] "408_CC_9_1.csv"
[1] "408_Cortical_1_1.csv"
[1] "408_Cortical_2_1.csv"
[1] "408_L2_1_1.csv"
[1] "408_L2_2_1.csv"
[1] "408_L2_3_1.csv"
[1] "408_TC_10_1.csv"
[1] "408_TC_11_1.csv"
[1] "408_TC_12_1.csv"
[1] "408_TC_13_1.csv"
[1] "408_TC_14_1.csv"
[1] "408_TC_15_1.csv"
[1] "408_TC_16_1.csv"
[1] "408_TC_17_1.csv"
[1] "408_TC_2_1.csv"
[1] "408_TC_20_1.csv"
[1] "408_TC_3_1.csv"
[1] "408_TC_4_1.csv"
[1] "408_TC_5_1.csv"
[1] "408_TC_6_1.csv"
[1] "408_TC_7_1.csv"
[1] "408_TC_8_1.csv"
[1] "408_TC_9_1.csv"
## Save this difficultly made file
saveRDS(clump_df, file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump', 'clump_df.rds'))
toc()
198.761 sec elapsed
clump_df_408 <- readRDS(file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump', 'clump_df_408.rds'))
temporary
clump_df <- rbind(clump_df_164, clump_df_408)
Error in rbind(deparse.level, ...) : object 'clump_df_408' not found
saveRDS(clump_df, file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump', 'clump_df.rds'))
readRDS(clump_df_164, file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump', 'clump_df_164.rds'))
Error in readRDS(clump_df_164, file.path("/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump", :
object 'clump_df_164' not found
clump_df <- rbind(clump_df_164, clump_df)
clump_df <- readRDS(file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clump', 'clump_df.rds'))
clump_df %>%
ggplot(aes(x = Area, y = Perim.)) + geom_point()
clump_pca <- prcomp(clump_df[, 14:20], center = TRUE,scale. = TRUE)
summary(clump_pca)
library(ggfortify)
autoplot(clump_pca)
autoplot(clump_pca, data = clump_df, colour = 'stream')
clump_df$stream = "wrong"
clump_df$stream[which(grepl('408_TC', clump_df$...1))] = "408_TC"
clump_df$stream[which(grepl('164_TC', clump_df$...1))] = "164_TC"
clump_df$stream[which(grepl('408_CC', clump_df$...1))] = "408_CC"
clump_df$stream[which(grepl('164_CC', clump_df$...1))] = "164_CC"
clump_df$stream[which(grepl('408_Cortical', clump_df$...1))] = "PCort"
clump_df$stream[which(grepl('164_TC_Cortical', clump_df$...1))] = "ATCort"
clump_df$stream[which(grepl('164_CC_Cortical', clump_df$...1))] = "ACCort"
clump_df$stream[which(grepl('408_L2', clump_df$...1))] = "PL2"
clump_df$stream[which(grepl('164_CC_L2', clump_df$...1))] = "AL2"
clump_df %>%
ggplot(aes(x = stream, y = Area)) + geom_violin()
asummary()
clump_df %>% group_by(stream) %>%
summarize()
clump_df %>%
ggplot(aes(x = stream, y = Area)) + geom_violin()
clump_df$img <- str_split_fixed(clump_df$...1, "_Clump", 2)[, 1]
clump_df %>%
ggplot(aes(x = stream, y = Area, color = 'TBR1+ CGE')) + geom_point(size = clump_df$Area / 10e4, ) + RotatedAxis()
subset_df <- clump_df %>%
filter(grepl('_', stream))
clump_df %>%
filter(grepl('_', stream)) %>%
ggplot(aes(x = stream, y = ncell)) + geom_boxplot() + geom_point(size = subset_df$Area / 10e4, ) + RotatedAxis() + ylab('# DCX cells in clump')
clump_df %>%
filter(grepl('_', stream)) %>%
ggplot(aes(x = stream, y = Area)) + geom_boxplot() + RotatedAxis()
library(umap)
clump_umap <- umap(clump_pca$x)
umap_cds <- as.data.frame(clump_umap$layout)
colnames(umap_cds) <- c('umap1', 'umap2')
clump_df$umap1 <- umap_cds$umap1
clump_df$umap2 <- umap_cds$umap2
clump_df %>%
filter(umap2 < 10) %>%
arrange(CALB2_pct) %>%
ggplot(aes(x = umap1, y = umap2, color = CALB2_pct)) + geom_point(size = 0.8, alpha = 0.8)
clump_df %>%
#filter(grepl('_', stream)) %>%
ggplot(aes(x = umap1, y = umap2, color = stream)) + geom_point(size = 0.8, alpha = 0.5)
clump_df$ncell <- rowSums(clump_df[, 14:20])
clump_df$MGE_pct <- clump_df$MGE / clump_df$ncell
clump_df$TBR1_pct <- clump_df$`TBR1+ CGE` / clump_df$ncell
clump_df$Ex_pct <- clump_df$Ex / clump_df$ncell
clump_df$SST_pct <- clump_df$`SST+LHX6+` / clump_df$ncell
clump_df$CALB2_pct <- clump_df$`CALB2+DLX2+` / clump_df$ncell
clump_df$VIP_pct <- clump_df$`VIP+GAD1+` / clump_df$ncell
clump_df$CGE_pct <- clump_df$`CGE/LGE` / clump_df$ncell
weird_cluster <- clump_df %>%
filter(umap2 > 10)
weird_cluster
clump_df$MGE_pct <- clump_df$MGE / clump_df$ncell
clump_df$TBR1_pct <- clump_df$`TBR1+ CGE` / clump_df$ncell
clump_df$Ex_pct <- clump_df$Ex / clump_df$ncell
clump_df$SST_pct <- clump_df$`SST+LHX6+` / clump_df$ncell
clump_df$CALB2_pct <- clump_df$`CALB2+DLX2+` / clump_df$ncell
clump_df$VIP_pct <- clump_df$`VIP+GAD1+` / clump_df$ncell
clump_df$CGE_pct <- clump_df$`CGE/LGE` / clump_df$ncell
#clump_df$order = hclust_avg$order
bplot_df <- clump_df[hclust_avg$order, c(1, 21,25, 26:32)] %>%
filter(MGE_pct != 'NaN') %>%
pivot_longer(!c(...1, stream, ncell), names_to = "pct_type", values_to = "pct")
bplot_df %>%
filter(ncell > 5) %>%
#filter(grepl('164', stream)) %>%
ggplot(aes(x = ...1, y = pct, fill = pct_type)) +
geom_bar(position="stack", stat="identity") +
theme(axis.text.x=element_blank()) + xlab('clump') #remove x axis labels) + xl
dist_mat <- dist(clump_pca$x, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
plot(hclust_avg)
avg_same = c()
avg_othr = c()
for(i in 1:ncol(clump_df)){
## so I am going to iterate through each cell
## Get the class identity of the cell
class_cell = Idents(jy_164)[i]
## Get the cells that are the same
nearest_neighbors = as.matrix(nn_graph[[i]])
neighbor_idents = Idents(jy_164)[nearest_neighbors[[1]]]
same_ixs = which(neighbor_idents == class_cell)
## Get the cells that are not the same
othr_ixs = which(neighbor_idents != class_cell)
## Get the avreage distance of same
same_obj = dist(XY_164[c(i, nearest_neighbors[[1]][same_ixs]), ])
avg_same = c(avg_same, mean(same_obj[1:length(same_ixs)]))
## Get the avreage distance of not same
othr_obj = dist(XY_164[c(i, nearest_neighbors[[1]][othr_ixs]), ])
avg_othr = c(avg_othr, mean(same_obj[1:length(othr_ixs)]))
}
clump_df$max_pct <- clump_df[, 26:32] %>%
apply(1, max)
clump_df$hmg_ix <- (clump_df$max_pct - (1/7)) / (1-1/7)
clump_df %>%
filter(!is.na(hmg_ix)) %>%
filter(ncell > 3) %>%
ggplot(aes(x=hmg_ix)) +
geom_histogram(color="black", fill="white")
clump_3 <- clump_df %>% filter(ncell>3)
nctyp = 7
btstrp = 1000
null_dist = matrix(0, nrow = btstrp, ncol = nrow(clump_3))
tic()
for(k in 1:btstrp){
if(k %% 250 == 0){
print(k)
}
for(i in 1:nrow(clump_3)){
cells = clump_3$ncell[i]
samp <- rdunif(cells, 1 , 7)
mx_pct <- max(table(samp)) / cells
null_dist[k, i] = (mx_pct - (1/7)) / (1-1/7)
}
}
toc()
null_dist
hist(null_dist)
null_df <- as.data.frame(as.vector(colMeans(null_dist)))
colnames(null_df) <- 'ixs'
null_df$dist = 'null'
null_df
true_df <- as.data.frame(cbind(clump_3$hmg_ix, 'true'))
colnames(true_df) <- c('ixs', 'dist')
mg_df <- rbind(null_df, true_df)
mg_df$ixs <- as.numeric(mg_df$ixs)
mg_df %>%
ggplot(aes(x=ixs, color=dist)) +
geom_histogram(fill="white", alpha=0.4, position="identity")
jy_all$clump
jy_all$in_clump <- 'Not in Clump'
jy_all$in_clump[which(jy_all$clump != "NaN")] <- 'In Clump'
jy_all$clump_stream <- paste(jy_all$area, jy_all$in_clump)
jy_all$avp = 'wrong'
jy_all$avp[which(grepl('164', jy_all$area))] = '164'
jy_all$avp[which(grepl('408', jy_all$area))] = '408'
jy_all$clump_stream2 <- paste(jy_all$avp, jy_all$in_clump)
jy_all$clump_plot = jy_all$clump
jy_all$clump_plot[which(jy_all$clump_type != "True Clump")] = "NaN"
linear = c("164_TC_10_12",
"164_TC_10_5",
"164_TC_1_1",
"164_TC_5_4",
"164_TC_7_5",
"164_TC_8_7",
"164_TC_9_1",
"164_TC_9_2",
"408_CC_12_5",
"408_TC_10_5",
"408_TC_9_5")
psuedolinear = c("408_Cortical_1_0",
"408_TC_10_3",
"164_TC_1_3",
"164_TC_Cortical1_11",
"164_CC_8_2",
"164_CC_9_3",
"164_CC_L2-1_5",
"164_CC_L2-2_4",
"164_TC_8_2",
"164_TC_8_3",
"164_TC_2_1",
"164_TC_4_9",
"164_TC_5_2",
"164_TC_5_9",
"164_TC_6_0",
"164_TC_7_5",
"164_TC_7_1",
"164_TC_7_4",
"164_TC_8_4",
"408_TC_14_4",
"408_TC_2_0",
"408_TC_2_12",
"408_TC_3_3",
"408_TC_4_11",
"408_TC_4_16",
"408_tc_4_1",
"408_TC_6_5",
"164_TC_Cortical_2_1")
big_clump = c("408_TC_7_4",
"164_TC_1_0",
"164_TC_3_1",
"408_TC_11_1",
"408_TC_12_0",
"408_TC_13_0",
"408_TC_14_0",
"408_TC_5_3",
"408_TC_15_2",
"408_TC_3_1",
"408_TC_3_0",
"164_TC_6_6",
"408_TC_11_1",
"164_TC_Cortical1_5")
jy_all$clump_type = "NaN"
jy_all$clump_type[which(jy_all$clump != "NaN")] = "Small Clump"
jy_all$clump_type[which(jy_all$clump %in% linear)] = "Linear"
jy_all$clump_type[which(jy_all$clump %in% psuedolinear)] = "Psuedo-Linear"
jy_all$clump_type[which(jy_all$clump %in% big_clump)] = "True Clump"
jy_all$clump_reduced = 'error'
jy_all$clump_reduced[which(jy_all$clump_type %in% c('True Clump'))] = 'Clump'
jy_all$clump_reduced[which(jy_all$clump_type %in% c('NaN', 'Small Clump', 'Linear', 'Psuedo-Linear'))] = 'Individual'
dp <- DotPlot(jy_all, features = c('CXCR4', 'CXCR7', 'EGFR', 'LRP8', 'VLDLR'), group.by = "clump_reduced") + RotatedAxis() + ylab("")
ggsave(plot = dp, filename = 'clump_dot_plot.pdf', path = file.path(output_dir_plot, '20221212_1'), width = 12, height = 5, units = 'cm', dpi = 300)
jy_all$clump_stream <- paste(jy_all$area, jy_all$clump_type)
jy_all$clump_ctype <- paste(Ident(jy_all), jy_all$clump_reduced)
Error in Ident(jy_all) : could not find function "Ident"
#DotPlot(jy_all[, which(grepl("TC", jy_all$area) & grepl("164", jy_all$area))], features = c('CXCR4', 'LRP8', 'CXCR7', 'VLDLR', 'EGFR', 'CXCL12', 'CXCL14', 'RELN'), group.by = "clump_ctype") + RotatedAxis() + coord_flip()
DotPlot(jy_all, features = c('CXCR4', 'LRP8', 'CXCR7', 'VLDLR', 'EGFR', 'CXCL12', 'CXCL14', 'RELN'), group.by = "clump_ctype") + RotatedAxis() + coord_flip()
dp <- DotPlot(jy_all[, which(grepl('MS', jy_all$area))], features = c('CXCR4', 'LRP8', 'CXCR7', 'VLDLR', 'EGFR'), group.by = "clump_stream") + RotatedAxis() + ylab("")
ggsave(plot = dp, filename = 'clump_dot_plot_typestream.pdf', path = file.path(output_dir_plot, '20221212_1'), width = 15, height = 15, units = 'cm', dpi = 300)
So for me, I need from cell type 1 to cell type 2
Basically
from to. value CT1. CT1 CT1 CT2 CT1 CT3
dp <- DotPlot(jy_all[, which(grepl('MS', jy_all$area))], features = c('CXCR4', 'LRP8', 'CXCR7', 'VLDLR', 'EGFR'), group.by = "clump_type") + RotatedAxis() + ylab("")
ggsave(plot = dp, filename = 'clump_dot_plot_type.pdf', path = file.path(output_dir_plot, '20221212_1'), width = 12, height = 10, units = 'cm', dpi = 300)
classes <- levels(Idents(jy_all))
n = length(classes)
c1 = rep(classes, n)
c2 = rep(classes, each=n)
c_df <- as.data.frame(cbind(c1, c2))
XY_164 = Embeddings(jy_164, 'H')
neighbors = 5
nn_graph = nng(XY_164, k = neighbors)
XY_408 = Embeddings(jy_408, 'H')
nn_graph2 = nng(XY_408, k = neighbors)
clusters_164 = Idents(jy_all)[1:ncol(jy_164)]
Idents(jy_164) = clusters_164
clusters_408 = Idents(jy_all)[(ncol(jy_164)+1):ncol(jy_all)]
Idents(jy_408) = clusters_408
fill_adj_matrix <- function(nn_graph, jy_obj, adj_mat, undirected = TRUE){
adj_mat$val = 0
added = as.data.frame(matrix(-1, 1, 2))
colnames(added) = c('c1', 'c2')
for(i in 1:ncol(jy_obj)){
class_cell = Idents(jy_obj)[i]
## Get the cells that are the same
nearest_neighbors = as.matrix(nn_graph[[i]])
neighbor_idents = Idents(jy_obj)[nearest_neighbors[[1]]]
for(n in 1:length(neighbor_idents)){
#print(c(i, n))
n_id = as.numeric(nearest_neighbors[[1]][n])
neighbor = neighbor_idents[n]
if(length(added$c2[which(added$c1 == i)] == n_id) > 0 &&
added$c2[which(added$c1 == i)] == n_id && undirected
){
#print('dup')
} else{
#print('yup')
value = adj_mat$val[which(adj_mat$c1 == class_cell & adj_mat$c2 == neighbor)]
adj_mat$val[which(adj_mat$c1 == class_cell & adj_mat$c2 == neighbor)] = value + 1
added <- added %>% add_row(c1 = n_id, c2 = i)
}
}
}
return(adj_mat)
}
norm_adj_mat <- function(jy_obj, adj_mat){
idents <- levels(Idents(jy_obj))
for(ident in idents){
id_ct <- sum(Idents(jy_obj) == ident) * neighbors
adj_mat[which(adj_mat$c1 == ident), 3] = as.numeric(adj_mat[which(adj_mat$c1 == ident),
3]) / id_ct
}
return(adj_mat)
}
adj_mat1 <- fill_adj_matrix(nn_graph, jy_164, c_df)
adj_mat2 <- fill_adj_matrix(nn_graph2, jy_408, c_df)
#adj_mat1p <-norm_adj_mat(jy_164, adj_mat1)
#adj_mat2p <-norm_adj_mat(jy_408, adj_mat2)
adj_mat_final <- as.data.frame(cbind(c1, c2, adj_mat1$val + adj_mat2$val))
#adj_mat_final <- norm_adj_mat(jy_all, adj_mat_final)
colnames(adj_mat_final) <- c('from', 'to', 'value')
adj_mat_final$value <- as.numeric(adj_mat_final$value)
df2 = data.frame(start = c("a", "b", "c", "a"), end = c("a", "a", "b", "c"))
chordDiagram(df2, grid.col = 1:3, self.link = 1)
title("self.link = 1")
chordDiagram(df2, grid.col = 1:3, self.link = 2)
title("self.link = 2")
mat <- dcast(adj_mat_final, from ~ to, value.var = "value") %>%
column_to_rownames("from") %>%
#mutate_all(.funs = readr::parse_number) %>%
as.matrix()
circos.clear()
chordDiagram(mat, self.link = 1, grid.col = get_cluster_colors(rownames(mat)), col = col_mat)
col_mat = matrix("#e5e5e5", nrow = nrow(mat), ncol = ncol(mat))
diag(col_mat) <- get_cluster_colors(rownames(mat))
#dim(col_mat) = dim(mat) # to make sure it is a matrix
XYd_164 = Embeddings(jy_164[, which(grepl('CC', jy_164$area) & grepl('MS', jy_164$area))], 'H')
XYv_164 = Embeddings(jy_164[, which(grepl('TC', jy_164$area) & grepl('MS', jy_164$area))], 'H')
nnd_graph = nng(XYd_164, k = neighbors)
nnv_graph = nng(XYv_164, k = neighbors)
XYd_408 = Embeddings(jy_408[, which(grepl('CC', jy_408$area) & grepl('MS', jy_408$area))], 'H')
XYv_408 = Embeddings(jy_408[, which(grepl('TC', jy_408$area) & grepl('MS', jy_408$area))], 'H')
nnd_graph2 = nng(XYd_408, k = neighbors)
nnv_graph2 = nng(XYv_408, k = neighbors)
stream = 'CC'
adj_mat1 <- fill_adj_matrix(nn_graph,
jy_164[, which(grepl(stream, jy_164$area) & grepl('MS', jy_164$area))],
c_df, undirected = FALSE)
adj_mat2 <- fill_adj_matrix(nn_graph2,
jy_408[, which(grepl(stream, jy_408$area) & grepl('MS', jy_408$area))],
c_df, undirected = FALSE)
adj_mat_final <- as.data.frame(cbind(c1, c2, adj_mat1$val + adj_mat2$val))
#adj_mat_final <- norm_adj_mat(jy_all[, which(grepl(stream, jy_all$area) & grepl('MS', jy_all$area))], adj_mat_final)
colnames(adj_mat_final) <- c('from', 'to', 'value')
adj_mat_final$value = as.numeric(adj_mat_final$value)
check_adj_prob <- function(adj_mat){
types = unique(adj_mat$from)
for(ct in types){
sm <- sum(adj_mat[which(adj_mat$from == ct), 3])
print(sm)
}
}
check_adj_prob(adj_mat_final)
[1] 0.9828729
[1] 0.9936508
[1] 0.9698113
[1] 1
[1] 0.9974359
[1] 1
[1] 1
mat <- dcast(adj_mat_final, from ~ to, value.var = "value") %>%
column_to_rownames("from") %>%
#mutate_all(.funs = readr::parse_number) %>%
as.matrix()
circos.clear()
chordDiagram(mat, self.link = 1, grid.col = get_cluster_colors(rownames(mat)), col = col_mat)
Okay, so I need a nodeID for each “unique type”, so just the cell types
### Need to set up the d: edges
### and set the vertices: which are the node IDs
#adj_mat_final
net <- graph_from_adjacency_matrix(mat, mode = "directed", weighted = TRUE)
V(net)$color <- get_cluster_colors(V(net)$name)
V(net)$size <- log(round(rowSums(mat) / neighbors)) * 7
# The labels are currently node IDs.
# Setting them to NA will render no labels:
V(net)$label <- NA
# Set edge width based on weight:
E(net)$width <- log(E(net)$weight)
#change arrow size and edge color:
E(net)$arrow.size <- .2
E(net)$edge.color <- "gray80"
# We can even set the network layout:
graph_attr(net, "layout") <- layout_with_fr(net)#net, center = 'CGE/LGE')
plot(net, edge.arrow.size=.4, edge.curved=.5)
jy_all$slice <- NA
jy_all$slice[1:ncol(jy_164)] <- '164'
jy_all$slice[(ncol(jy_164)+1):ncol(jy_all)] <- '408'
jy_all$imgslice <- paste0(jy_all$slice, "_", jy_all$image)
ids <- levels(Idents(jy_all))
imgs <- unique(jy_all$imgslice)
jy_matrix <- matrix(-1, length(imgs), length(ids))
for(i in 1:length(imgs)){
img <- imgs[i]
obj <- jy_all[, which(jy_all$imgslice == img)]
tb <- table(factor(Idents(obj), levels = ids))
jy_matrix[i, ] <- tb / sum(tb)
}
rownames(jy_matrix) <- imgs
colnames(jy_matrix) <- ids
pheatmap(t(jy_matrix), cellwidth = 10, cellheight = 10, fontsize = 10,
cluster_cols = FALSE, cluster_rows = FALSE)
We have found that 69 cells have neighbors in other images. This should not be the case and should be fixed in following analyses.
same_img_check <- function(nn_graph, jy_obj){
counter = 0
for(i in 1:ncol(jy_obj)){
cell_img = jy_obj$image[i]
## Get the cells that are the same
nearest_neighbors = as.matrix(nn_graph[[i]])
neighbor_imgs = jy_obj$image[nearest_neighbors[[1]]]
if(any(neighbor_imgs != cell_img)){
counter = counter + 1
}
}
return(counter)
}
print(same_img_check(nn_graph, jy_164))
[1] 69
get_interactions <- function(knn_graph, obj, classes, c_df){
c_df$val = 0
for(i in 1:ncol(obj)){
class_cell = Idents(obj)[i]
## Get the cells that are the same
nearest_neighbors = as.matrix(nn_graph[[i]])
neighbor_idents = Idents(obj)[nearest_neighbors[[1]]]
for(neighbor in neighbor_idents){
value = c_df$val[which(c_df$c1 == class_cell & c_df$c2 == neighbor)]
c_df$val[which(c_df$c1 == class_cell & c_df$c2 == neighbor)] = value + 1
}
}
return(c_df)
}
### Iterate through all images
### run 100 permutations, and get the p value from the real data
permutations = 99
neighbors = 5
### Need to set up the dataframe
classes <- levels(Idents(jy_all))
n = length(classes)
c1 = rep(classes, n)
c2 = rep(classes, each=n)
c_df <- as.data.frame(cbind(c1, c2))
## will save all into 2 different matrices
interaction_mat <- matrix(-1, nrow(c_df), length(imgs))
avoidance_mat <- matrix(-1, nrow(c_df), length(imgs))
## Need to create a jy_all embedding
embed_164 <- Embeddings(jy_164, "H")
embed_408 <- Embeddings(jy_408, "H")
jy_all[["H"]] <- CreateDimReducObject(embeddings = rbind(embed_164, embed_408), key = "pixel_", assay = DefaultAssay(jy_all))
for(i in 1:length(imgs)){
print(paste("Permuting image", i, "of", length(imgs)))
img <- imgs[i]
obj <- jy_all[, which(jy_all$imgslice == img)]
### Create nn graph for real
XY_obj = Embeddings(obj, 'H')
nn_graph = nng(XY_obj, k = neighbors)
### Iterate through each cell type
### Compute the mean for this one
data_cts <- get_interactions(nn_graph, obj, classes, c_df)
obj_idents <- Idents(obj)
perm_matrix = matrix(NA, nrow(c_df), permutations)
for(permutation in 1:permutations){
Idents(obj) <- sample(Idents(obj))
perm_cts <- get_interactions(nn_graph, obj, classes, c_df)
perm_matrix[, permutation] <- perm_cts$val
}
interaction_mat[, i] <- rowSums(perm_matrix <= data_cts$val) / (permutations + 1)
avoidance_mat[, i] <- rowSums(perm_matrix >= data_cts$val) / (permutations + 1)
## count the identities
tbl <- table(Idents(obj))
## If the cell type does not exist in the image, do not count the P value for the
## interaction... obviously
nonexist_ixs <- which(!(c_df$c1 %in% names(tbl)) | !(c_df$c2 %in% names(tbl)))
interaction_mat[nonexist_ixs, i] <- NA
avoidance_mat[nonexist_ixs, i] <- NA
}
[1] "Permuting image 1 of 61"
[1] "Permuting image 2 of 61"
[1] "Permuting image 3 of 61"
[1] "Permuting image 4 of 61"
[1] "Permuting image 5 of 61"
[1] "Permuting image 6 of 61"
[1] "Permuting image 7 of 61"
[1] "Permuting image 8 of 61"
[1] "Permuting image 9 of 61"
[1] "Permuting image 10 of 61"
[1] "Permuting image 11 of 61"
[1] "Permuting image 12 of 61"
[1] "Permuting image 13 of 61"
[1] "Permuting image 14 of 61"
[1] "Permuting image 15 of 61"
[1] "Permuting image 16 of 61"
[1] "Permuting image 17 of 61"
[1] "Permuting image 18 of 61"
[1] "Permuting image 19 of 61"
[1] "Permuting image 20 of 61"
[1] "Permuting image 21 of 61"
[1] "Permuting image 22 of 61"
[1] "Permuting image 23 of 61"
[1] "Permuting image 24 of 61"
[1] "Permuting image 25 of 61"
[1] "Permuting image 26 of 61"
[1] "Permuting image 27 of 61"
[1] "Permuting image 28 of 61"
[1] "Permuting image 29 of 61"
[1] "Permuting image 30 of 61"
[1] "Permuting image 31 of 61"
[1] "Permuting image 32 of 61"
[1] "Permuting image 33 of 61"
[1] "Permuting image 34 of 61"
[1] "Permuting image 35 of 61"
[1] "Permuting image 36 of 61"
[1] "Permuting image 37 of 61"
[1] "Permuting image 38 of 61"
[1] "Permuting image 39 of 61"
[1] "Permuting image 40 of 61"
[1] "Permuting image 41 of 61"
[1] "Permuting image 42 of 61"
[1] "Permuting image 43 of 61"
[1] "Permuting image 44 of 61"
[1] "Permuting image 45 of 61"
[1] "Permuting image 46 of 61"
[1] "Permuting image 47 of 61"
[1] "Permuting image 48 of 61"
[1] "Permuting image 49 of 61"
[1] "Permuting image 50 of 61"
[1] "Permuting image 51 of 61"
[1] "Permuting image 52 of 61"
[1] "Permuting image 53 of 61"
[1] "Permuting image 54 of 61"
[1] "Permuting image 55 of 61"
[1] "Permuting image 56 of 61"
[1] "Permuting image 57 of 61"
[1] "Permuting image 58 of 61"
[1] "Permuting image 59 of 61"
[1] "Permuting image 60 of 61"
[1] "Permuting image 61 of 61"
## Save this difficultly made files
saveRDS(interaction_mat, file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/', 'interaction_mat.rds'))
saveRDS(avoidance_mat, file.path('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/', 'avoidance_mat.rds'))
plot_map <- interaction_mat
plot_map[which(plot_map >= 0.05)] = NA
pheatmap(plot_map, cluster_cols = FALSE, cluster_rows = FALSE, cex = 0.8)
0.05
[1] 0.05
min(rowMeans(interaction_mat, na.rm = TRUE))
[1] 0.3
rownames(avoidance_mat) <- paste0(c_df$c1, "->", c_df$c2)
colnames(avoidance_mat) <- imgs
rownames(interaction_mat) <- paste0(c_df$c1, "->", c_df$c2)
colnames(interaction_mat) <- imgs
vms_labels <- c(paste0("164_", antr_TC_MS), paste0("408_", post_TC_MS))
dms_labels <- c(paste0("164_", antr_CC_MS), paste0("408_", post_CC_MS))
sub_mat <- interaction_mat[, dms_labels]
avgs <- rowMeans(sub_mat < 0.05, na.rm = TRUE)
#avgs
interaction_df <- c_df %>% add_column(value = avgs)
colnames(interaction_df) <- c('from', 'to', 'value')
mat <- dcast(interaction_df, from ~ to, value.var = "value") %>%
column_to_rownames("from") %>%
#mutate_all(.funs = readr::parse_number) %>%
as.matrix()
net <- graph_from_adjacency_matrix(mat, mode = "directed", weighted = TRUE)
V(net)$color <- get_cluster_colors(V(net)$name)
V(net)$size <- table(Idents(jy_all[, which(jy_all$imgslice %in% dms_labels)]))[V(net)$name] / 5
# The labels are currently node IDs.
# Setting them to NA will render no labels:
V(net)$label <- NA
# Set edge width based on weight:
E(net)$width <- E(net)$weight * 15
#change arrow size and edge color:
E(net)$arrow.size <- .2
E(net)$edge.color <- "blue1"
# We can even set the network layout:
graph_attr(net, "layout") <- layout_as_star(net, center = 'CGE/LGE')
plot(net, edge.curved=.1)
pheatmap(avoidance_mat, cluster_cols = FALSE, cluster_rows = FALSE, cex = 0.8)
clusters = levels(Idents(jy_408))
plots <- lapply(1:length(clusters), function(i){
plot_clusters_vertical_spatial(jy_408, cluster = clusters[i], pt.size = 0.1)
})
verts= plot_grid(plotlist = plots, label_size = 1, nrow = 1)
#verts
#ggsave(plot = verts, filename = 'jy_408_spatial_arc_sep.pdf', path = file.path(output_dir_plot, '20221212_1'), width = 7, height = 7, units = 'cm', dpi = 300)
od_idnts = c("MGE", "CGE/LGE", "TBR1+ CGE", "CALB2+DLX2+", "VIP+GAD1+", "SST+LHX6+", "Ex")
pap = plot_clusters_vertical_spatial_no_grid(jy_408, pt.size = 0.1, x_width = 40, force_idents = od_idnts)
#ggsave(plot = pap, filename = 'jy_408_spatial_arc_sep_od.pdf', path = file.path(output_dir_plot, '20221212_1'), width = 7, height = 7, units = 'cm', dpi = 300)
pap